import pandas as pd
import matplotlib.pyplot as plt


plt.rcParams['font.sans-serif'] = ['STSong']
plt.rcParams['axes.unicode_minus'] = False

# 读取CSV文件
df = pd.read_csv("top250-1.csv", sep="#", encoding='utf-8',
                 names=['num', 'title', 'year', 'area', 'genre', 'rating_num', 'comment_num', 'href'])

# 统计每个地区国家的数量
area_counts = df['area'].value_counts()

# 找出占比小于%3的地区并合并为其他
threshold = 0.03
small_area = area_counts[area_counts/area_counts.sum() < threshold].index.tolist()
df['area'] = df['area'].apply(lambda x: '其他'if x in small_area else x)

# 重新统计
merged_area_counts = df['area'].value_counts()

# 绘制饼状图
plt.pie(merged_area_counts, labels=merged_area_counts.index, autopct='%1.1f%%')
# 标题
plt.title('Top 250电影地区国家占比')
plt.axis('equal')  # 使饼状图为正圆形
plt.show()